import requests
from pyquery import PyQuery as pq
import pandas as pd

url="https://www.runoob.com/html/html-tutorial.html"
headers={
"user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36",
"cookie": "_ga=GA1.2.1786683706.1542539383; Hm_lvt_3eec0b7da6548cf07db3bc477ea905ee=1625406275; __gads=ID=95c3dd0e3c237bdd-225ef46348c200ac:T=1625406134:RT=1593659382:R:S=ALNI_MZ3KhrB2MM3PzPYXyg5REIFYiA2hw; runoob-uuid=2684fe00-bd64-4568-8cad-5a6a404efbd3; _gid=GA1.2.1934514202.1631881135; __gpi=00000000-0000-0000-0000-000000000000&cnVub29iLmNvbQ==&Lw==; SERVERID=fb669a01438a4693a180d7ad8d474adb|1631880989|1631880989"
}
response = requests.get(url=url,headers=headers)
result = pq(response.text)
titles = []
urls = []

for i in result("#leftcolumn a"):
    d = pq(i)
    titles.append(d.text())
    urls.append("https://www.runoob.com/"+d.attr("href"))
    # print(d.text(),"https://www.runoob.com/"+d.attr("href"))

df = pd.DataFrame(data={"地址":urls,"标题":titles})
df.to_excel('./spiderWork.xls',
sheet_name = 'spiderWork',
header = True,
index = False)